import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
iplball = pd.read_csv('./iplball.csv')
iplball.head()
| id | inning | over | ball | batsman | non_striker | bowler | batsman_runs | extra_runs | total_runs | non_boundary | is_wicket | dismissal_kind | player_dismissed | fielder | extras_type | batting_team | bowling_team | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 335982 | 1 | 6 | 5 | RT Ponting | BB McCullum | AA Noffke | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 1 | 335982 | 1 | 6 | 6 | BB McCullum | RT Ponting | AA Noffke | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 2 | 335982 | 1 | 7 | 1 | BB McCullum | RT Ponting | Z Khan | 0 | 0 | 0 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 3 | 335982 | 1 | 7 | 2 | BB McCullum | RT Ponting | Z Khan | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
| 4 | 335982 | 1 | 7 | 3 | RT Ponting | BB McCullum | Z Khan | 1 | 0 | 1 | 0 | 0 | NaN | NaN | NaN | NaN | Kolkata Knight Riders | Royal Challengers Bangalore |
w_kind = ['caught', 'bowled', 'lbw', 'stumped', 'caught and bowled', 'hit wicket']
wickets = iplball[iplball['dismissal_kind'].isin(w_kind)]
all_wickets = wickets.groupby(['bowler','id','over','ball'])[['is_wicket']].sum().reset_index()
most_wickets = all_wickets.groupby('bowler')[['is_wicket']].count().sort_values('is_wicket',ascending=False).rename(columns={'is_wicket':'total_wickets'}).reset_index()
over_balls = iplball.groupby(['bowler','id','inning','over','ball'])[['total_runs','extra_runs']].sum().reset_index()
total_balls = over_balls.groupby(['bowler',])[['ball']].sum().reset_index().rename(columns={'ball':'total_balls_bowled'})
most_balls_in_single_over = over_balls.groupby('bowler')[['ball']].max().reset_index().rename(columns={'ball':'most_balls_in_single_over'})
matches = over_balls.groupby('bowler')['id'].value_counts().to_frame('match').reset_index().groupby('bowler')['id'].count().to_frame('matches').reset_index()
extras = over_balls.groupby('bowler')[['extra_runs','total_runs']].sum().reset_index()
economy = over_balls.groupby(['bowler','id','over'])[['total_runs']].sum().reset_index().groupby('bowler')[['total_runs']].mean().rename(columns={'total_runs':'economy'}).reset_index()
wicket_in_a_match = wickets.groupby(['bowler','id'])[['is_wicket']].sum().reset_index()
most_wicket_in_a_match=wicket_in_a_match.groupby('bowler')[['is_wicket']].max().reset_index()
most_wicket_in_a_match_id = pd.merge(most_wicket_in_a_match,wicket_in_a_match,how='left',left_on=['bowler','is_wicket'],right_on=['bowler','is_wicket']).rename(columns={'is_wicket':'most_wickets_in_a_match'})
total_runs_in_match = iplball.groupby(['bowler','id'])[['total_runs']].sum().reset_index()
b_fig = pd.merge(most_wicket_in_a_match_id,total_runs_in_match,how='left',left_on=['bowler','id'],right_on=['bowler','id']).sort_values(['bowler','most_wickets_in_a_match','total_runs'],ascending=False)
b_fig['bowler'] = b_fig['bowler'].drop_duplicates(keep='last')
b_fig.dropna(inplace = True)
b_fig['best_figure'] = b_fig['most_wickets_in_a_match'].astype(str)+ "-" + b_fig['total_runs'].astype(str)
best_figures = b_fig[['bowler','best_figure']]
dot_balls = over_balls[over_balls['total_runs']==0].groupby('bowler')['total_runs'].count().to_frame('dot_balls').reset_index()
fours_con = over_balls[(over_balls['total_runs']-over_balls['extra_runs'])==4].groupby('bowler')['total_runs'].count().to_frame('fours').reset_index()
sixes_con = over_balls[(over_balls['total_runs']-over_balls['extra_runs'])==6].groupby('bowler')['total_runs'].count().to_frame('sixes').reset_index()
f_s = pd.merge(fours_con,sixes_con,how='left',left_on=['bowler'],right_on=['bowler'])
f_s['sixes'] = f_s['sixes'].fillna(0).astype(int)
f_s['boundaries_4-6'] = f_s.fours.astype(str) + '-' + f_s.sixes.astype(str)
boundaries = f_s[['bowler','boundaries_4-6']]
merge1 = pd.merge(most_balls_in_single_over,most_wickets,how='left',left_on=['bowler'],right_on=['bowler'])
merge2 = pd.merge(merge1,total_balls,how='left',left_on=['bowler'],right_on=['bowler'])
merge3 = pd.merge(merge2,extras,how='left',left_on=['bowler'],right_on=['bowler'])
merge3['strike_rate']=merge3.total_balls_bowled/merge3.total_wickets
merge3['average']=merge3.total_runs/merge3.total_wickets
merge4 = pd.merge(merge3,economy,how='left',left_on=['bowler'],right_on=['bowler'])
merge5 = pd.merge(merge4,best_figures,how='left',left_on=['bowler'],right_on=['bowler'])
merge6 = pd.merge(merge5,dot_balls,how='left',left_on=['bowler'],right_on=['bowler'])
merge7 = pd.merge(merge6,boundaries,how='left',left_on=['bowler'],right_on=['bowler'])
merge8 = pd.merge(merge7,matches,how='left',left_on=['bowler'],right_on=['bowler'])
bowling = merge8.sort_values('total_wickets',ascending=False).fillna(0)
bowling = bowling[['bowler','matches', 'total_wickets',
'total_balls_bowled', 'total_runs','dot_balls','extra_runs',
'average', 'strike_rate','economy', 'best_figure', 'boundaries_4-6','most_balls_in_single_over',]]
bowling
| bowler | matches | total_wickets | total_balls_bowled | total_runs | dot_balls | extra_runs | average | strike_rate | economy | best_figure | boundaries_4-6 | most_balls_in_single_over | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 350 | SL Malinga | 122 | 170.0 | 10895 | 3486 | 1144.0 | 293 | 20.505882 | 64.088235 | 7.292887 | 5-13 | 314-86 | 9 |
| 6 | A Mishra | 150 | 160.0 | 11563 | 3913 | 1125.0 | 125 | 24.456250 | 72.268750 | 7.396975 | 5-17 | 222-172 | 9 |
| 276 | PP Chawla | 163 | 156.0 | 11594 | 4330 | 1137.0 | 134 | 27.756410 | 74.320513 | 7.930403 | 4-21 | 316-181 | 8 |
| 99 | DJ Bravo | 137 | 153.0 | 10412 | 3869 | 860.0 | 210 | 25.287582 | 68.052288 | 8.447598 | 4-22 | 305-138 | 9 |
| 133 | Harbhajan Singh | 157 | 150.0 | 12352 | 4038 | 1244.0 | 170 | 26.920000 | 82.346667 | 7.172291 | 5-18 | 289-142 | 8 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 360 | SR Tendulkar | 4 | 0.0 | 133 | 59 | 4.0 | 2 | 0.000000 | 0.000000 | 9.833333 | 0 | 6-1 | 7 |
| 380 | Sunny Gupta | 1 | 0.0 | 70 | 47 | 0.0 | 2 | 0.000000 | 0.000000 | 15.666667 | 0 | 4-3 | 7 |
| 395 | Tejas Baroka | 1 | 0.0 | 69 | 33 | 6.0 | 0 | 0.000000 | 0.000000 | 8.250000 | 0 | 2-2 | 6 |
| 405 | VS Yeligati | 2 | 0.0 | 119 | 61 | 6.0 | 8 | 0.000000 | 0.000000 | 12.200000 | 0 | 7-2 | 7 |
| 411 | Y Gnaneswara Rao | 1 | 0.0 | 21 | 7 | 3.0 | 0 | 0.000000 | 0.000000 | 7.000000 | 0 | 1-0 | 6 |
420 rows × 13 columns
fig = px.scatter(bowling, x="matches", y="total_wickets",
size="total_runs", color="total_balls_bowled",
hover_name="bowler", log_x=True, size_max=40)
fig.show()
upto = range(0,20)
bowling = bowling.iloc[upto]
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
ax.bar(bowling.bowler,bowling.strike_rate,color='r')
ax.bar(bowling.bowler,bowling.average,bottom = bowling.strike_rate,color='b')
ax.bar(bowling.bowler,bowling.economy,color='g')
ax.set_ylabel('Total')
# ax.set_title('')
plt.xticks(rotation=90)
# ax.set_yticks()
ax.legend(labels=['Strike rate', 'Average','Economy'])
ax
<matplotlib.axes._axes.Axes at 0x223351fe1c0>
bowl_plot = bowling[['bowler','average','strike_rate','economy']]
bowl_plot1 = bowl_plot.melt(id_vars='bowler',value_vars=['strike_rate','average','economy'])
fig = px.bar(bowl_plot1, x="bowler", y="value",color='variable')
# Plot!
# st.plotly_chart(fig, use_container_width=True)
fig
# bowl_plot
bowl_plot = bowling[['bowler','total_balls_bowled','dot_balls','boundaries_4-6']]
bowl_plot = pd.merge(bowl_plot,f_s,how='left',left_on=['bowler'],right_on=['bowler'])
bowl_plot1 = bowl_plot.melt(id_vars='bowler',value_vars=['dot_balls','fours','sixes'])
bowl_plot1 = pd.merge(bowl_plot1,bowling[['bowler','total_balls_bowled']],how='left',left_on=['bowler'],right_on=['bowler'])
fig = px.bar(bowl_plot1, x="bowler", y="value",color='variable',hover_data=['total_balls_bowled'])
fig
# bowl_plot1